
###############################
## INDEX PLOTS for OMTR ALL  ##  
###############################
# Labussiere, Levels and Vink 2021

# This script produces the index plots of transition and state elements by cluster membership
# INPUT: OMTR_clusters.dta (from clustering.do)
# OUTPUTS: Figures 2 (manuscript) and S5 (Supplementary Materials)

rm(list = ls())	

# Libraries
library(TraMineR)
library(foreign)
library(RColorBrewer)
library(Hmisc)
library(plyr)

# INPUT dataset: individuals in rows, variables in column
my_data <- read.dta("OMTR_clusters.dta")

# Extraction relevant information 
# Personal identifier
rinpersoon <- as.integer(my_data$rinpersoon)
# Cluster membership
cluster_6 <- as.integer(my_data$pam_ward_R6)

# State elements for each date
track2008 <- as.integer(my_data$track2008)
track2009 <- as.integer(my_data$track2009)
track2010 <- as.integer(my_data$track2010)
track2011 <- as.integer(my_data$track2011)
track2012 <- as.integer(my_data$track2012)
track2013 <- as.integer(my_data$track2013)
track2014 <- as.integer(my_data$track2014)
track2015 <- as.integer(my_data$track2015)
track2016 <- as.integer(my_data$track2016)

# Transition elements for each date
transition2008 <- as.integer(my_data$transition2008)
transition2009 <- as.integer(my_data$transition2009)
transition2010 <- as.integer(my_data$transition2010)
transition2011 <- as.integer(my_data$transition2011)
transition2012 <- as.integer(my_data$transition2012)
transition2013 <- as.integer(my_data$transition2013)
transition2014 <- as.integer(my_data$transition2014)
transition2015 <- as.integer(my_data$transition2015)
transition2016 <- as.integer(my_data$transition2016)


#######################
# SEQUENCE INDEXPLOTS #
#######################

# Declare the dataset
my_data_transitions <- data.frame(rinpersoon, cluster_6, transition2008, transition2009, transition2010, transition2011, transition2012, transition2013, transition2014, transition2015, transition2016)
names(my_data_transitions)

cluster_6 <- factor(cluster_6, levels = c(1,2,3,4,5,6), labels = c("Downward (5.3%)",
                                                                  "Discontinued (9.1%)",
                                                                   "Detour (2.8%)",
                                                                   "Dropout (13.2%)",
                                                                   "Upward (19.6%)",
                                                                   "Standard (50.1%)"))

cluster_6_order <- factor(my_data$pam_ward_R6, levels = c(6,5,4,2,1,3), labels = c("Standard (50.1%)",
                                                                   "Upward (19.6%)",
                                                                   "Dropout (13.2%)",
                                                                   "Discontinued (9.1%)",
                                                                   "Downward (5.3%)",
                                                                   "Detour (2.8%)"))                                                                  
# Prepare the labels
transitions_labels <- c("Standard", "Downward", "Upward", 
                        "Exit", "Dropout", "Start VMBO", "Start Bridge", "Start HAVO/VWO")


# Declare the data in the STS format
my_data_seq_transitions <- seqdef(my_data_transitions, 
                                  var= cbind("transition2008", "transition2009", "transition2010",
                                             "transition2011", "transition2012", "transition2013", 
                                             "transition2014", "transition2015", "transition2016"),
                                  labels=transitions_labels)                             
# color palette
a <- brewer.pal(8, "Set2")
attr(my_data_seq_transitions, "cpal") <- a

years_labels <- c("2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016")

# Mean time spent in each element
seqmtplot(my_data_seq_transitions, group = cluster_6)


  ##############
  ## FIGURE 2 ##
  ##############
# Index plots by clusters with transition elements
seqplot(my_data_seq_transitions, type = "I", group = cluster_6_order, sortv="from.start", with.legend=TRUE, ylab=NA, yaxis=FALSE, xtlab = years_labels)



# ADDITIONAL CHECKS FOR CLUSTER DESCRIPTION

    # Frequency plot and transition elements distribution for dropout
    my_data_transitions_dropouts <- my_data_transitions[which(my_data_transitions$cluster_6==4),]
    my_data_seq_transitions_dropouts <- seqdef(my_data_transitions_dropouts, 
                                      var= cbind("transition2008", "transition2009", "transition2010",
                                                 "transition2011", "transition2012", "transition2013", 
                                                 "transition2014", "transition2015", "transition2016"),
                                      labels=transitions_labels)    

    seqplot(my_data_seq_transitions_dropouts, type = "f", with.legend=TRUE, idxs=1:30)

    seqstatd(my_data_seq_transitions_dropouts)
    seqtab(my_data_seq_transitions_dropouts, idxs=1:30)

    # Frequency plot and transition elements distribution for upward movers
    my_data_transitions_upward <- my_data_transitions[which(my_data_transitions$cluster_6==5),]
    my_data_seq_transitions_upward <- seqdef(my_data_transitions_upward, 
                                               var= cbind("transition2008", "transition2009", "transition2010",
                                                          "transition2011", "transition2012", "transition2013", 
                                                          "transition2014", "transition2015", "transition2016"),
                                               labels=transitions_labels)  
    seqstatd(my_data_seq_transitions_upward)
    seqplot(my_data_seq_transitions_upward, type = "f", with.legend=TRUE, idxs=1:15)



#####################
# STATES INDEXPLOTS #
#####################

# Declare the dataset
my_data_states <- data.frame(rinpersoon, cluster_6, track2008, track2009, track2010, track2011, track2012, track2013, track2014, track2015, track2016)
names(my_data_states)

cluster_6 <- factor(cluster_6, levels = c(1,2,3,4,5,6), labels = c("Downward",
                                                                   "Discontinued",
                                                                   "Detour",
                                                                   "Dropout",
                                                                   "Upward",
                                                                   "Standard"))

cluster_6_order <- factor(my_data$pam_ward_R6, levels = c(6,5,4,2,1,3), labels = c("Standard (50.1%)",
                                                                                   "Upward (19.6%)",
                                                                                   "Dropout (13.2%)",
                                                                                   "Discontinued (9.1%)",
                                                                                   "Downward (5.3%)",
                                                                                   "Detour (2.8%)"))   
# Prepare the labels
states_labels <- c("Bridge",
                   "VMBO",
                   "VMBO-t",	
                   "HAVO",
                   "VWO",
                   "MBO",
                   "MBO-mm",
                   "HBO",
                   "WO",
                   "Deregistration",
                   "Outward",
                   "Dropout",
                   "Out of school"
)

states_labels_detailed <- c("Bridge",
                   "Preparatory vocational (VMBO)",
                   "Preparatory Vocational + (VMBO-t)",	
                   "General education (HAVO)",
                   "Pre-university education (VWO)",
                   "Senior vocational education (MBO)",
                   "Senior vocational education + (MBO-mm)",
                   "Vocational college (HBO)",
                   "University (WO)",
                   "Deregistration",
                   "Outward",
                   "Dropout",
                   "Out of school"
)

# Declare the data in the STS format
my_data_seq_states <- seqdef(my_data_states, var= cbind("track2008", "track2009", "track2010","track2011", "track2012", 
                                                        "track2013", "track2014", "track2015", "track2016"),
                             labels=states_labels_detailed)

# Add more colors to the palette
c <- brewer.pal(12, "Paired")
attr(my_data_seq_states, "cpal") <- c(c, "gray")


# Mean time spent in each element
seqmtplot(my_data_seq_states, group = cluster_6)


years_labels <- c("2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016")


  ##############
  ## FIGURE S5 #
  ##############
# Index plots by clusters with state elements
seqplot(my_data_seq_states, type = "I", group = cluster_6_order, sortv="from.start", with.legend=TRUE, ncol=3, ylab=NA, yaxis=FALSE, xtlab = years_labels, cex.legend=0.9)


# ADDITIONAL CHECKS FOR CLUSTER DESCRIPTION

    # State element distribution for Detour trajectories
    my_data_states_Uturners <- my_data_states[which(my_data_states$cluster_6==3),]
    my_data_seq_states_Uturners <- seqdef(my_data_states_Uturners, 
                                               var= cbind("track2008", "track2009", "track2010",
                                                          "track2011", "track2012", "track2013", 
                                                          "track2014", "track2015", "track2016"),
                                               labels=states_labels)    
    seqstatd(my_data_seq_states_Uturners)



